R Markdown
if (!require("knitr")) install.packages("knitr")
if (!require("SummarizedExperiment")) install.packages("SummarizedExperiment")
if (!require("edgeR")) install.packages("edgeR")
if (!require("tidyr")) install.packages("tidyr")
if (!require("scales")) install.packages("scales")
if (!require("plotly")) install.packages("plotly")
# IMPORT VARIABLES
data_samples <- params$data_samples
data_counts <- params$data_counts
data_annotation <- params$data_annotation
setGeneName <- params$setGeneName
cpm_value <- params$cpm_value
excluded_samples <- params$excluded_samples
design_value <- params$design_value
matrix_value <- params$matrix_value
alpha <- params$alpha
# Get desgin
design_value <- reOrderDesign(matrix_value, design_value, data_samples)
# FILTER DATA IF NESSECARY
#Filter if nessecary
data_samples <- data_samples[!rownames(data_samples) %in% excluded_samples, , drop=FALSE]
data_counts <- data_counts[,!colnames(data_counts) %in% excluded_samples]
# SHOW VALUES OF VARIABLES
cpm_value
## [1] 1
excluded_samples
## [1] ""
design_value
## [1] "~0 + phenotype"
matrix_value
## [1] "pDC_unstimulated - pDC"
alpha
## [1] 0.05
# SHOW RAW DATA
save(data_counts, data_samples, file="test.RData")
se <- readCountsFromTable(data_counts, data_samples)
alignmentSummaryPlot(se)
complexityPlot(se)
# READ ALL FILES INTO SE
se <- readCountsFromTable(data_counts[!grepl('^__', rownames(data_counts)),], data_samples)
se <- addSamplesFromTableToSE(se, data_samples)
if (!is.null(data_annotation)) {
se <- addAnnotationsFromTableToSE(se, data_annotation)
}
# GET AND CREATE DGE
dge <- DGEList(counts = assay(se), samples = colData(se), genes = rowData(se))
row.names(dge$genes) <- row.names(dge$counts)
dge <- dge[ rowSums( abs( dge$counts ) ) > 1, ]
tempDge <- dge
tempDge$counts <- cpm(dge, log = TRUE)
countDistributionLinePlot(tempDge)
# GET SELECTED FEATURES
edger <- calcNormFactors( dge, method = "TMM")
counts <- cpm(edger, log = TRUE)
selectedFeatures <- rownames( edger )[ apply( counts, 1, function( v ) sum( v >= cpm_value ) ) >= 1/4 * ncol( counts ) ]
# GET HIGH EXPRESSED FEATURES
highExprDge <- dge[ selectedFeatures,, keep.lib.sizes = FALSE ]
# NORMALIZE DATA
normDge <- calcNormFactors( highExprDge, method = "TMM")
tempDge <- normDge
tempDge$counts <- cpm(normDge, log = TRUE)
countDistributionLinePlot(tempDge)
variancePcaPlot(tempDge)
# CREATE DESIGN
design <- model.matrix(eval(parse(text=design_value)), normDge$samples)
design <- reNameDesign(design, normDge)
matrix_value <- strsplit(matrix_value, ",")[[1]]
contr.matrix <- makeContrasts(contrasts = matrix_value, levels=design)
contr.matrix
## Contrasts
## Levels pDC_unstimulated - pDC
## pDC -1
## pDC_unstimulated 1
# PERFORM ANALYSIS
normDge <- estimateDisp(normDge, design)
edfit <- glmFit(normDge, design)
efit <- glmLRT(edfit, contrast = contr.matrix)
efit$DE <- decideTests(efit, p.value = alpha)
summary(efit$DE)
## -1*pDC 1*pDC_unstimulated
## Down 3107
## NotSig 7817
## Up 3245
# CREATE deTab TABLE
deTab <- topTags(efit, n=Inf)$table
deTab <- deTab[order(rownames(deTab)),]
deTab$DE <- c(efit$DE[order(rownames(efit$DE)),])
deTab$adj.P.Val <- p.adjust(deTab$PValue, method = "BH")
deTab <- rename(deTab, "avgLog2CPM" = "logCPM")
deTab <- rename(deTab, "avgLog2FC" = "logFC")
deTab <- rename(deTab, "P.Value" = "PValue")
# CHANGE GENE ID TO SYMBOL IF NESSECARY
#if (setGeneName == "symbol" && !is.null(data_annotation)) {
# tempCol <- rownames(deTab)
# rownames(deTab) <- make.names(deTab$geneName, unique=TRUE)
# deTab$geneName <- tempCol
# colnames(deTab)[1] <- "geneId"
# rownames(normDge$counts) <- rownames(deTab)
#}
# SHOW DGE RESULTS
ma_plot(deTab, NULL)
pValuePlot(deTab)
# SAVE ANALYSIS
normDge$counts <- cpm(normDge, log = TRUE)
save(deTab, normDge, file="analysis.RData")
# INFO
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] reshape2_1.4.3 org.Mm.eg.db_3.10.0 org.Hs.eg.db_3.10.0 AnnotationDbi_1.48.0 igraph_1.2.4.2
## [6] ReactomePA_1.30.0 graphite_1.32.0 DOSE_3.12.0 clusterProfiler_3.14.3 plotly_4.9.1
## [11] ggplot2_3.2.1 broom_0.5.4 scales_1.1.0 tidyr_1.0.2 DESeq2_1.26.0
## [16] SummarizedExperiment_1.16.1 DelayedArray_0.12.2 BiocParallel_1.20.1 matrixStats_0.55.0 Biobase_2.46.0
## [21] GenomicRanges_1.38.0 GenomeInfoDb_1.22.0 IRanges_2.20.2 S4Vectors_0.24.3 BiocGenerics_0.32.0
## [26] knitr_1.28 BiocManager_1.30.10 shinyjs_1.1 shinycssloaders_0.3 shinyWidgets_0.5.0
## [31] shinydashboard_0.7.1 shiny_1.4.0 edgeR_3.28.0 limma_3.42.1
##
## loaded via a namespace (and not attached):
## [1] backports_1.1.5 Hmisc_4.3-0 fastmatch_1.1-0 plyr_1.8.5 lazyeval_0.2.2 splines_3.6.1 crosstalk_1.0.0
## [8] urltools_1.7.3 digest_0.6.23 htmltools_0.4.0 GOSemSim_2.12.0 viridis_0.5.1 GO.db_3.10.0 magrittr_1.5
## [15] checkmate_1.9.4 memoise_1.1.0 cluster_2.1.0 annotate_1.64.0 graphlayouts_0.5.0 enrichplot_1.6.1 prettyunits_1.1.1
## [22] jpeg_0.1-8.1 colorspace_1.4-1 rappdirs_0.3.1 blob_1.2.1 ggrepel_0.8.1 xfun_0.12 dplyr_0.8.4
## [29] crayon_1.3.4 RCurl_1.98-1.1 jsonlite_1.6.1 graph_1.64.0 genefilter_1.68.0 survival_3.1-8 glue_1.3.1
## [36] polyclip_1.10-0 gtable_0.3.0 zlibbioc_1.32.0 XVector_0.26.0 DBI_1.1.0 Rcpp_1.0.3 viridisLite_0.3.0
## [43] xtable_1.8-4 progress_1.2.2 htmlTable_1.13.3 gridGraphics_0.4-1 reactome.db_1.70.0 foreign_0.8-75 bit_1.1-15.1
## [50] europepmc_0.3 Formula_1.2-3 DT_0.11 htmlwidgets_1.5.1 httr_1.4.1 fgsea_1.12.0 RColorBrewer_1.1-2
## [57] acepack_1.4.1 XML_3.99-0.3 pkgconfig_2.0.3 farver_2.0.3 nnet_7.3-12 locfit_1.5-9.1 ggplotify_0.0.4
## [64] tidyselect_1.0.0 rlang_0.4.4 later_1.0.0 munsell_0.5.0 tools_3.6.1 generics_0.0.2 RSQLite_2.2.0
## [71] ggridges_0.5.2 evaluate_0.14 stringr_1.4.0 fastmap_1.0.1 yaml_2.2.1 bit64_0.9-7 tidygraph_1.1.2
## [78] purrr_0.3.3 ggraph_2.0.0 packrat_0.5.0 nlme_3.1-143 mime_0.9 DO.db_2.9 xml2_1.2.2
## [85] compiler_3.6.1 rstudioapi_0.10 png_0.1-7 geneplotter_1.64.0 tibble_2.1.3 tweenr_1.0.1 stringi_1.4.5
## [92] lattice_0.20-38 Matrix_1.2-18 vctrs_0.2.2 pillar_1.4.3 lifecycle_0.1.0 triebeard_0.3.0 data.table_1.12.8
## [99] cowplot_1.0.0 bitops_1.0-6 httpuv_1.5.2 qvalue_2.18.0 R6_2.4.1 latticeExtra_0.6-29 promises_1.1.0
## [106] gridExtra_2.3 MASS_7.3-51.5 assertthat_0.2.1 withr_2.1.2 GenomeInfoDbData_1.2.2 mgcv_1.8-31 hms_0.5.3
## [113] grid_3.6.1 rpart_4.1-15 rmarkdown_2.1 rvcheck_0.1.7 ggforce_0.3.1 base64enc_0.1-3